library(tidycensus)
library(tidyverse)
library(tmap)
library(sf)
census_api_key("YOUR API KEY GOES HERE")

Inspect variables for Decennial Census

decennial_variables<-load_variables(2010,"sf1")
View(decennial_variables)
name label concept
H001001 Total HOUSING UNITS
H002001 Total URBAN AND RURAL
H002002 Total!!Urban URBAN AND RURAL
H002003 Total!!Urban!!Inside urbanized areas URBAN AND RURAL
H002004 Total!!Urban!!Inside urban clusters URBAN AND RURAL
H002005 Total!!Rural URBAN AND RURAL

Calling and Manipulating Data using tidycensus and dplyr: Basics

Calling and Viewing Data

Let’s find out the population, by state, in 2010:

state_population_2010<-get_decennial(geography = "state", 
                                     variables = "P001001", 
                                     geometry=TRUE,
                                     shift_geo = TRUE,
                                     year = 2010)
View(state_population_2010)
GEOID NAME variable value geometry
04 Arizona P001001 6392017 MULTIPOLYGON (((-1111066 -8…
05 Arkansas P001001 2915918 MULTIPOLYGON (((557903.1 -1…
06 California P001001 37253956 MULTIPOLYGON (((-1853480 -9…
08 Colorado P001001 5029196 MULTIPOLYGON (((-613452.9 -…
09 Connecticut P001001 3574097 MULTIPOLYGON (((2226838 519…
11 District of Columbia P001001 601723 MULTIPOLYGON (((1960720 -41…

We can adjust the geography and year parameters; let’s say we want the population distribution across CO counties in the year 2010:

CO_county_population_2010<-get_decennial(geography = "county", 
                                         state="CO",
                                         variables = "P001001", 
                                         year = 2010)
View(CO_county_population_2010)
GEOID NAME variable value
08023 Costilla County, Colorado P001001 3524
08025 Crowley County, Colorado P001001 5823
08027 Custer County, Colorado P001001 4255
08029 Delta County, Colorado P001001 30952
08031 Denver County, Colorado P001001 600158
08035 Douglas County, Colorado P001001 285465

Cleaning and Manipulating Data Using dplyr

Let’s clean up the table by removing the “variable” column, and renaming the “value” column as “Population”, which we can do using functions from the “dplyr” package.

CO_county_population_2010<-get_decennial(geography = "county", 
                                         state="CO",
                                         variables = "P001001", 
                                         year = 2000) %>% 
                           mutate(variable=NULL) %>% 
                           rename(population=value)

View(CO_county_population_2010)
GEOID NAME population
08001 Adams County, Colorado 363857
08003 Alamosa County, Colorado 14966
08005 Arapahoe County, Colorado 487967
08007 Archuleta County, Colorado 9898
08009 Baca County, Colorado 4517
08011 Bent County, Colorado 5998

It’s also possible to call multiple variables into a single table. To see this, let’s add a field/column containing the rural population in each state in 2010 (as well as the total population in that year), and order the dataset in descending order with respect to the rural population (such that the state with the largest rural population will appear as the first record in the dataset):

state_pop_ruralpop_2010<-get_decennial(geography = "state", 
                                          variables = c("P001001", "P002005"),
                                          output="wide",
                                          year = 2010) %>% 
                         rename(total_population=P001001, rural_population=P002005) %>% 
                         arrange(desc(rural_population))

state_pop_ruralpop_2010
GEOID NAME total_population rural_population
48 Texas 25145561 3847522
37 North Carolina 9535483 3233727
42 Pennsylvania 12702379 2711092
39 Ohio 11536504 2546810
26 Michigan 9883640 2513683
13 Georgia 9687653 2415502

Let’s generate a new variable based on the variables we already have in the dataset. This variable will measure the percentage of each state’s population that are rural residents (calculated by dividing the rural population by the total population, and multiplying by 100). We’ll call this variable “rural_pct”. We’ll also resort the dataset, so that it’s sorted in descending order with respect to the new “rural_pct” variable, instead of the actual number of rural residents:

state_pop_ruralpop_2010<-
  state_pop_ruralpop_2010 %>% mutate(rural_pct=(rural_population/total_population)*100) %>% 
                              arrange(desc(rural_pct))
  
View(state_pop_ruralpop_2010)
GEOID NAME total_population rural_population rural_pct
23 Maine 1328361 814819 61.34018
50 Vermont 625741 382356 61.10451
54 West Virginia 1852994 950184 51.27831
28 Mississippi 2967297 1503073 50.65462
30 Montana 989415 436401 44.10697
05 Arkansas 2915918 1278329 43.83968

The dplyr package also makes it easy to filter datasets based on specific criteria, which we can then assign to a new object. For example, let’s say that we want to generate a new dataset that only includes states whose rural populations are greater than 40% of their overall populations. We’ll assign this new dataset to an object called “rural_pct_over40”:

rural_pct_over40<-state_pop_ruralpop_2010 %>% filter(rural_pct>40)
View(rural_pct_over40)
GEOID NAME total_population rural_population rural_pct
23 Maine 1328361 814819 61.34018
50 Vermont 625741 382356 61.10451
54 West Virginia 1852994 950184 51.27831
28 Mississippi 2967297 1503073 50.65462
30 Montana 989415 436401 44.10697
05 Arkansas 2915918 1278329 43.83968
46 South Dakota 814180 352933 43.34828
21 Kentucky 4339367 1806024 41.61953
01 Alabama 4779736 1957932 40.96318
38 North Dakota 672591 269719 40.10149

Student Exercise

Create a dataset of Colorado counties that had a rural population that exceeded 50% of the county’s overall population in 2010, and sort the dataset in descending order with respect to the field containing information on the percentage of the county’s rural resident’s. Your final dataset should look something like this:

GEOID NAME total_population rural_population rural_pct
08023 Costilla County, Colorado 3524 3524 100.00000
08025 Crowley County, Colorado 5823 5823 100.00000
08027 Custer County, Colorado 4255 4255 100.00000
08033 Dolores County, Colorado 2064 2064 100.00000
08039 Elbert County, Colorado 23086 23086 100.00000
08047 Gilpin County, Colorado 5441 5441 100.00000
08053 Hinsdale County, Colorado 843 843 100.00000
08057 Jackson County, Colorado 1394 1394 100.00000
08061 Kiowa County, Colorado 1398 1398 100.00000
08073 Lincoln County, Colorado 5467 5467 100.00000
08079 Mineral County, Colorado 712 712 100.00000
08091 Ouray County, Colorado 4436 4436 100.00000
08093 Park County, Colorado 16206 16206 100.00000
08095 Phillips County, Colorado 4442 4442 100.00000
08111 San Juan County, Colorado 699 699 100.00000
08109 Saguache County, Colorado 6108 6108 100.00000
08103 Rio Blanco County, Colorado 6666 6666 100.00000
08113 San Miguel County, Colorado 7359 7359 100.00000
08115 Sedgwick County, Colorado 2379 2379 100.00000
08121 Washington County, Colorado 4814 4814 100.00000
08009 Baca County, Colorado 3788 3788 100.00000
08021 Conejos County, Colorado 8256 8256 100.00000
08017 Cheyenne County, Colorado 1836 1836 100.00000
08019 Clear Creek County, Colorado 9088 9088 100.00000
08049 Grand County, Colorado 14843 12260 82.59786
08083 Montezuma County, Colorado 25535 17155 67.18230
08125 Yuma County, Colorado 10043 6519 64.91088
08029 Delta County, Colorado 30952 19553 63.17201
08119 Teller County, Colorado 23350 14618 62.60385
08105 Rio Grande County, Colorado 11982 7493 62.53547
08067 La Plata County, Colorado 51334 30774 59.94857
08007 Archuleta County, Colorado 12084 7175 59.37603
08051 Gunnison County, Colorado 15324 8981 58.60741
08055 Huerfano County, Colorado 6711 3768 56.14662

##More advanced data wrangling

##Iteration, Temporal Dynamics, and Exploratory Visualization

Let’s

my_years<-c(2000,2010)
population_rural_2000_2010<-map(
  my_years,
  ~(get_decennial(geography = "state", 
                  variables = c("P001001", "P002005"),
                  output="wide",
                  year =.)) %>% 
    mutate(rural_pct=(P002005/P001001)*100) %>% 
    arrange(NAME)
)
## Getting data from the 2000 decennial Census
## Using Census Summary File 1
## Getting data from the 2010 decennial Census
## Using Census Summary File 1
names(population_rural_2000_2010)<-my_years

rural_change<-full_join(population_rural_2000_2010[["2000"]],
                        population_rural_2000_2010[["2010"]],by="NAME") %>%  
              mutate(rural_pct_change=rural_pct.y-rural_pct.x) %>% 
              select(NAME,rural_pct_change)
rural_change
## # A tibble: 52 x 2
##    NAME                 rural_pct_change
##    <chr>                           <dbl>
##  1 Alabama                        -3.59 
##  2 Alaska                         -0.421
##  3 Arizona                        -1.64 
##  4 Arkansas                       -3.64 
##  5 California                     -0.509
##  6 Colorado                       -1.68 
##  7 Connecticut                    -0.252
##  8 Delaware                       -3.18 
##  9 District of Columbia            0    
## 10 Florida                        -1.88 
## # … with 42 more rows
basegraph<-rural_change %>%
  ggplot(aes(x = reorder(NAME,rural_pct_change), y=rural_pct_change)) + 
  geom_col()+
  coord_flip()

basegraph+labs(title="Rural Depopulation", x="State Name", y="Pct Change in Rural Population")+
  theme(plot.title=element_text(hjust=0.5))

rural_depop_tomap<-full_join(state_population_2010,rural_change,by="NAME")

foundational_map<-tm_shape(rural_depop_tomap)+
  tm_polygons(col="rural_pct_change", n=6,style="jenks",palette="BuGn", midpoint=TRUE)

foundational_map
## Warning: The shape rural_depop_tomap contains empty units.

##custom breaks and title
revised_map<-tm_shape(rural_depop_tomap)+
  tm_polygons(col="rural_pct_change", breaks=c(-6,-4,-2, 0, 1, 2),palette="YlGnBu", midpoint=TRUE)+
  tm_layout(frame=FALSE, main.title="Percentage Point Change\nin Rural Population, By State",  
              main.title.position="left", legend.outside=TRUE)

revised_map
## Warning: The shape rural_depop_tomap contains empty units.

Student Visualization Practice

Practice visualizing Census by doing ONE of the following: 1) make a map (using the tmap package) that shows county-level variation in the median age across the state of Colorado or 2) make a visualization (using the ggplot package) of state-level variation in the median age across the entire United States.

Option 1 Code

median_age_CO<- get_decennial(geography = "county",
                              state="CO",
                              variables = "P013001", 
                              year = 2010,
                              geometry = TRUE) %>% 
                rename(median_age=value) %>% 
                relocate(NAME)
median_age_CO_map<-tm_shape(median_age_CO)+
                   tm_polygons(col="median_age",breaks=c(30,35,40,45,50),palette="YlGnBu", midpoint=TRUE)+
                   tm_layout(frame=FALSE, main.title="Median Age by County,\nColorado",  
                   main.title.position="left", legend.outside=TRUE)

median_age_CO_map

Making a Web Map

tmap_mode("view")
median_age_CO_map

Option 2 Code

median_age_CO_visualization<-
  median_age_CO %>%
  ggplot(aes(x = median_age, y = reorder(NAME, median_age))) + 
  geom_point()+
  labs(title="Median Age by County, CO", x="Median Age", y="County Name")+
  theme(plot.title=element_text(hjust=0.5))

median_age_CO_visualization

median_age_CO_cleaned<-median_age_CO %>% 
                       mutate(County_Name=str_remove_all(NAME,"Colorado|,|County"))

median_age_CO_cleaned_visualization<-
  median_age_CO_cleaned %>%
  ggplot(aes(x = median_age, y = reorder(County_Name, median_age))) + 
  geom_point()+
  labs(title="Median Age by County, CO", x="Median Age", y="County")+
  theme(plot.title=element_text(hjust=0.5))

median_age_CO_cleaned_visualization

American Community Survey

Calling ACS Data

To inspect the variable list for the ACS, use the “load variables” function. Let’s say we want to work with the 5-year ACS ending in 2019:

ACS_5_2019<-load_variables(2019,"acs5")
View(ACS_5_2019)
name label concept
B01001_001 Estimate!!Total: SEX BY AGE
B01001_002 Estimate!!Total:!!Male: SEX BY AGE
B01001_003 Estimate!!Total:!!Male:!!Under 5 years SEX BY AGE
B01001_004 Estimate!!Total:!!Male:!!5 to 9 years SEX BY AGE
B01001_005 Estimate!!Total:!!Male:!!10 to 14 years SEX BY AGE
B01001_006 Estimate!!Total:!!Male:!!15 to 17 years SEX BY AGE

Let’s issue a call to the API and generate a table that gives us the median-income of the United States by county. We may want to eventually have the option of mapping this data, so we’ll set the geometry parameter equal to TRUE. Note that when using the “get_acs” function call, the default setting will return data from the 5-year ACS that terminates in the specified year (i.e. if the year parameter is set to 2019, the function will return the 2015-2019 ACS). If we want to call the 1 year or 3 year ACS, the “survey” argument of the “get_acs” function could be set to “acs1” or “acs3”, depending on which survey we are interested in calling.

median_income<-get_acs(geography="county",
                       variables="B19013_001",
                       year=2019) %>% 
              rename(median_income=estimate) %>% 
              arrange(desc(median_income))
               
View(median_income)
GEOID NAME variable median_income moe
51107 Loudoun County, Virginia B19013_001 142299 2089
51610 Falls Church city, Virginia B19013_001 127610 16144
51059 Fairfax County, Virginia B19013_001 124831 1281
06085 Santa Clara County, California B19013_001 124055 1117
06081 San Mateo County, California B19013_001 122641 1680
35028 Los Alamos County, New Mexico B19013_001 121324 4613

Manipulating and Visualizing ACS Data: dplyr’s “group_by” and “slice” functions, and visualizing uncertainty using ggplot

Let’s say that we want to generate a table that contains the highest median-income county for each state. To do so, we will use dplyr’s “group_by” and “slice” functions, after separating out the “Name” field in the existing table (which is in the form “County Name, State”) into separate “County” and “State” fields:

highest_income_counties<-median_income %>% 
  separate(NAME,c("County","State"),sep=",") %>% 
  group_by(State) %>% 
  arrange(desc(median_income)) %>% 
  slice(1) %>% 
  unite(NAME, c("County","State"), remove=FALSE, sep=",")

View(highest_income_counties)
kable(highest_income_counties) 
GEOID NAME County State variable median_income moe
01117 Shelby County, Alabama Shelby County Alabama B19013_001 77799 2248
02110 Juneau City and Borough, Alaska Juneau City and Borough Alaska B19013_001 88390 4059
04013 Maricopa County, Arizona Maricopa County Arizona B19013_001 64468 326
05007 Benton County, Arkansas Benton County Arkansas B19013_001 66362 1292
06085 Santa Clara County, California Santa Clara County California B19013_001 124055 1117
08035 Douglas County, Colorado Douglas County Colorado B19013_001 119730 1710
09001 Fairfield County, Connecticut Fairfield County Connecticut B19013_001 95645 1039
10003 New Castle County, Delaware New Castle County Delaware B19013_001 73892 1210
11001 District of Columbia, District of Columbia District of Columbia District of Columbia B19013_001 86420 1008
12109 St. Johns County, Florida St. Johns County Florida B19013_001 82252 2741
13117 Forsyth County, Georgia Forsyth County Georgia B19013_001 107218 2004
15003 Honolulu County, Hawaii Honolulu County Hawaii B19013_001 85857 907
16081 Teton County, Idaho Teton County Idaho B19013_001 74216 3576
17093 Kendall County, Illinois Kendall County Illinois B19013_001 96563 4721
18057 Hamilton County, Indiana Hamilton County Indiana B19013_001 98173 2249
19049 Dallas County, Iowa Dallas County Iowa B19013_001 88479 3234
20091 Johnson County, Kansas Johnson County Kansas B19013_001 89087 998
21185 Oldham County, Kentucky Oldham County Kentucky B19013_001 99128 3974
22005 Ascension Parish, Louisiana Ascension Parish Louisiana B19013_001 80527 3017
23005 Cumberland County, Maine Cumberland County Maine B19013_001 73072 1427
24027 Howard County, Maryland Howard County Maryland B19013_001 121160 2169
25019 Nantucket County, Massachusetts Nantucket County Massachusetts B19013_001 107717 5735
26093 Livingston County, Michigan Livingston County Michigan B19013_001 84221 1674
27139 Scott County, Minnesota Scott County Minnesota B19013_001 102152 3021
28089 Madison County, Mississippi Madison County Mississippi B19013_001 71824 2728
29183 St. Charles County, Missouri St. Charles County Missouri B19013_001 84978 1195
30043 Jefferson County, Montana Jefferson County Montana B19013_001 69646 4258
31153 Sarpy County, Nebraska Sarpy County Nebraska B19013_001 82032 1552
32015 Lander County, Nevada Lander County Nevada B19013_001 88030 21398
33015 Rockingham County, New Hampshire Rockingham County New Hampshire B19013_001 93756 1893
34027 Morris County, New Jersey Morris County New Jersey B19013_001 115527 1813
35028 Los Alamos County, New Mexico Los Alamos County New Mexico B19013_001 121324 4613
36059 Nassau County, New York Nassau County New York B19013_001 116100 1093
37183 Wake County, North Carolina Wake County North Carolina B19013_001 80591 822
38105 Williams County, North Dakota Williams County North Dakota B19013_001 87161 7443
39041 Delaware County, Ohio Delaware County Ohio B19013_001 106908 2786
40017 Canadian County, Oklahoma Canadian County Oklahoma B19013_001 72056 1690
41067 Washington County, Oregon Washington County Oregon B19013_001 82215 997
42029 Chester County, Pennsylvania Chester County Pennsylvania B19013_001 100214 1232
72061 Guaynabo Municipio, Puerto Rico Guaynabo Municipio Puerto Rico B19013_001 35928 1674
44009 Washington County, Rhode Island Washington County Rhode Island B19013_001 85531 2042
45013 Beaufort County, South Carolina Beaufort County South Carolina B19013_001 68377 1987
46083 Lincoln County, South Dakota Lincoln County South Dakota B19013_001 82473 2951
47187 Williamson County, Tennessee Williamson County Tennessee B19013_001 112962 2976
48397 Rockwall County, Texas Rockwall County Texas B19013_001 100920 4011
49043 Summit County, Utah Summit County Utah B19013_001 102958 5613
50007 Chittenden County, Vermont Chittenden County Vermont B19013_001 73647 2249
51107 Loudoun County, Virginia Loudoun County Virginia B19013_001 142299 2089
53033 King County, Washington King County Washington B19013_001 94974 726
54037 Jefferson County, West Virginia Jefferson County West Virginia B19013_001 80430 3750
55133 Waukesha County, Wisconsin Waukesha County Wisconsin B19013_001 87277 1110
56039 Teton County, Wyoming Teton County Wyoming B19013_001 84678 8230

If we want to visualize this information, we can incorporate the MOE for these estimates into the visualization, so that we can convey the uncertainty surrounding these median income estimates.

highest_income_counties_viz<-highest_income_counties %>% 
                             ggplot(aes(x=median_income,y=reorder(NAME, median_income)))+
                             geom_errorbarh(aes(xmin = median_income - moe, xmax = median_income + moe)) +
                             geom_point(color = "red", size = 3)+
                                 labs(title="County with Highest Median Income, by State",
                                  y="",
                                  x="Median Income Estimate from 5-year ACS\n(bars indicate margin of error)")+ 
                             theme(plot.title=element_text(hjust=0.5))

highest_income_counties_viz                                    

Student Exercise: Generate a visualization of median income by county in Colorado, using the 2014-2018 ACS:

median_income_CO_2018<-get_acs(geography="county",
                       state="CO",
                       variables="B19013_001",
                       year=2018) %>% 
              rename(median_income=estimate) %>% 
              arrange(desc(median_income))

highest_income_counties_CO_2018_viz<-
                  median_income_CO_2018 %>% 
                  mutate(County_Name=str_remove_all(NAME,"Colorado|,|County")) %>% 
                  ggplot(aes(x=median_income,y=reorder(County_Name, median_income)))+
                             geom_errorbarh(aes(xmin = median_income - moe, xmax = median_income + moe)) +
                             geom_point(color = "blue", size = 3)+
                                  labs(title="Median Income in Colorado, by County (2018)",
                                  y="",
                                  x="Median Income Estimate from 5 year ACS\n(Bars indicate margin of error)")+
                             theme(plot.title=element_text(hjust=0.5))

highest_income_counties_CO_2018_viz                       

Student Exercise: Make a nationwide map of median income by county

Exploratory Correlations: Relationship Between County Median Income and County Share of Over-25 Population with Doctoral Degrees (5 Year ACS ending in 2018)

education_vars<-c(Bachelors="B15003_022", Masters="B15003_023", Professional="B15003_024", 
                  Doctorate="B15003_025") 

education_acs_2018<-get_acs(geography="county",
                            variables=education_vars,
                            summary_var="B15003_001")
## Getting data from the 2015-2019 5-year ACS
View(education_acs_2018)
GEOID NAME variable estimate moe summary_est summary_moe
01001 Autauga County, Alabama Bachelors 6019 622 37367 201
01001 Autauga County, Alabama Masters 2875 412 37367 201
01001 Autauga County, Alabama Professional 499 187 37367 201
01001 Autauga County, Alabama Doctorate 536 199 37367 201
01003 Baldwin County, Alabama Bachelors 31801 1609 151112 326
01003 Baldwin County, Alabama Masters 11812 935 151112 326
pct_less_than_BA<-education_acs_2018 %>% 
                  group_by(GEOID) %>% 
                  mutate(no_degree_pct=((summary_est-sum(estimate))/(summary_est)*100)) %>% 
                  summarize(mean(no_degree_pct)) %>% 
                  rename(no_degree_pct="mean(no_degree_pct)")

View(pct_less_than_BA)
GEOID no_degree_pct
01001 73.42843
01003 68.13754
01005 88.42129
01007 89.62147
01009 86.90659
01011 87.94536
median_income<-get_acs(geography="county",
                       variables="B19013_001",
                       year=2018) %>% 
                rename(median_income=estimate) %>% 
                arrange(desc(median_income))
## Getting data from the 2014-2018 5-year ACS
median_income_nodegree<-full_join(median_income,pct_less_than_BA,by="GEOID")

View(median_income_nodegree)
kable(head(median_income_nodegree))
GEOID NAME variable median_income moe no_degree_pct
51107 Loudoun County, Virginia B19013_001 136268 2063 38.65637
51610 Falls Church city, Virginia B19013_001 124796 15295 22.44259
51059 Fairfax County, Virginia B19013_001 121133 1144 38.44545
24027 Howard County, Maryland B19013_001 117730 2023 37.43706
51013 Arlington County, Virginia B19013_001 117374 2067 24.70069
06085 Santa Clara County, California B19013_001 116178 938 47.59985
medianincome_nodegree_viz<-median_income_nodegree %>% 
                           ggplot()+
                           geom_point(aes(x=no_degree_pct,y=median_income))+
                           geom_smooth(aes(x=no_degree_pct,y=median_income),method="lm")+
                           ylab("Estimated County Median Income")+
                           xlab("Estimated Percentage of Under-25 Population Without at least Bachelors Degree")

medianincome_nodegree_viz
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

median_income_nodegree<-median_income_nodegree %>% 
                        separate(NAME,c("County","State"),sep=",")

medianincome_nodegree_bystate_viz<-median_income_nodegree %>% 
                                   ggplot()+
                                   geom_point(aes(x=no_degree_pct,y=median_income))+
                                   geom_smooth(aes(x=no_degree_pct,y=median_income),method="lm")+
                                   ylab("County Median Income")+
                                   xlab("Pct No Degree")+
                                   facet_wrap(~State)

medianincome_nodegree_bystate_viz

Colorado Covid Cases

setwd(setwd("~/Desktop"))

co_covid<-read_csv("co_covid.csv") %>% 
          mutate(GEOID=as.character(GEOID)) %>% 
          mutate(GEOID=paste0("0",GEOID))

View(co_covid)
OBJECTID FULL_ GEOID LABEL STAETFP COUNTY COUNTYFP County_Pos_Cases County_Population County_Rate_Per_100_000 County_Pos_Cases_Yesterday County_Pos_Cases_Change County_Deaths County_Deaths_Yesterday County_Deaths_Change State_Pos_Cases State_Population State_Rate_Per_100000 State_Deaths State_CDC_Deaths State_Number_Hospitalizations State_Number_Tested State_Test_Encounters State_Number_of_Counties_Pos State_Number_of_Outbreaks Data_Source Date_Data_Last_Updated Shape__Area Shape__Length
1 Saguache County 08109 Saguache 8 SAGUACHE 109 327 6824 4791.91 327 0 4 4 0 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.8431490 4.391844
2 Sedgwick County 08115 Sedgwick 8 SEDGWICK 115 197 2229 8838.04 196 1 2 2 0 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.1520111 1.707546
3 Cheyenne County 08017 Cheyenne 8 CHEYENNE 17 125 1825 6849.32 125 0 5 5 0 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.4786278 3.111271
4 Custer County 08027 Custer 8 CUSTER 27 165 5059 3261.51 164 1 1 1 0 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.1968592 2.364502
5 La Plata County 08067 La Plata 8 LA PLATA 67 3169 56272 5631.58 3161 8 37 37 0 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.4472778 3.022820
6 San Juan County 08111 San Juan 8 SAN JUAN 111 43 726 5922.87 43 0 NA NA NA 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.1028337 1.716641
co_covid_medianincome<-inner_join(median_income, co_covid,by="GEOID")
View(co_covid_medianincome)
GEOID NAME variable median_income moe OBJECTID FULL_ LABEL STAETFP COUNTY COUNTYFP County_Pos_Cases County_Population County_Rate_Per_100_000 County_Pos_Cases_Yesterday County_Pos_Cases_Change County_Deaths County_Deaths_Yesterday County_Deaths_Change State_Pos_Cases State_Population State_Rate_Per_100000 State_Deaths State_CDC_Deaths State_Number_Hospitalizations State_Number_Tested State_Test_Encounters State_Number_of_Counties_Pos State_Number_of_Outbreaks Data_Source Date_Data_Last_Updated Shape__Area Shape__Length
08035 Douglas County, Colorado B19013_001 115314 2028 52 Douglas County Douglas 8 DOUGLAS 35 22523 351528 6407.17 22517 6 224 225 -1 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.2280495 2.246267
08039 Elbert County, Colorado B19013_001 96658 4279 59 Elbert County Elbert 8 ELBERT 39 1337 26686 5010.12 1335 2 12 12 0 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.5005127 3.295997
08014 Broomfield County, Colorado B19013_001 89624 4013 21 Broomfield County Broomfield 8 BROOMFIELD 14 3981 70762 5625.90 3982 -1 74 74 0 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.0091580 1.174637
08037 Eagle County, Colorado B19013_001 84685 4478 18 Eagle County Eagle 8 EAGLE 37 5415 55070 9832.94 5398 17 22 22 0 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.4596715 3.158051
08059 Jefferson County, Colorado B19013_001 78943 1142 12 Jefferson County Jefferson 8 JEFFERSON 59 38778 583081 6650.53 38761 17 732 730 2 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.2101877 2.863442
08013 Boulder County, Colorado B19013_001 78642 1583 15 Boulder County Boulder 8 BOULDER 13 19616 327164 5995.77 19606 10 227 228 -1 444712 5763976 7715.37 6048 6126 24250 2654490 6597592 64 4100 Colorado Department of Public Health and Environment Data through March 15, 2021 0.2025347 2.383375

correlation between income and election results

correlation between health insurance and covid

faceting

across the state of Colorado in the year 2010 OR 2) A visualization of county-level

Additional Work with Dplyr and Visualization

##county median age map?

Appendix

my_years<-c(2000,2010)
population_rural_2000_2010<-map(
  my_years,
  ~(get_decennial(geography = "state", 
                 variables = c("P001001", "P002005"),
                 output="wide",
                 year =.)) %>% 
      mutate(rural_pct=(P002005/P001001)*100) %>% 
      arrange(NAME)
  )
## Getting data from the 2000 decennial Census
## Using Census Summary File 1
## Getting data from the 2010 decennial Census
## Using Census Summary File 1
names(population_rural_2000_2010)<-my_years

joined_ds<-full_join(population_rural_2000_2010[["2000"]],population_rural_2000_2010[["2010"]],by="NAME") %>% 
           mutate(pct_change=rural_pct.y-rural_pct.x) %>% 
           select(NAME,pct_change)
joined_ds
## # A tibble: 52 x 2
##    NAME                 pct_change
##    <chr>                     <dbl>
##  1 Alabama                  -3.59 
##  2 Alaska                   -0.421
##  3 Arizona                  -1.64 
##  4 Arkansas                 -3.64 
##  5 California               -0.509
##  6 Colorado                 -1.68 
##  7 Connecticut              -0.252
##  8 Delaware                 -3.18 
##  9 District of Columbia      0    
## 10 Florida                  -1.88 
## # … with 42 more rows